home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_400
/
408_01
/
pccharst.c
< prev
next >
Wrap
Text File
|
1993-08-16
|
19KB
|
518 lines
/*
SNEWS 1.91
pccharst - routines to handle multiple character sets for IBM PCs
Copyright (C) 1993 Daniel Fandrich
<dan@fch.wimsey.bc.ca> or CompuServe 72365,306
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 1, as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
See the file COPYING, which contains a copy of the GNU General
Public License.
Source is formatted with a tab size of 4.
*/
#include "defs.h"
#include "pccharst.h"
#include <dos.h>
#include <ctype.h>
#include <string.h>
#define DEFAULT_CP 437 /* mapping to use if active CP isn't supported */
#define NOCH '?' /* character to show for those invalid in current code page */
enum code_pages code_page_table;
/* these names must match enum code_pages */
struct {int code_page_num; enum code_pages cptable;} code_page_list[] = {
437, CP437, /* US */
850, CP850, /* Multilingual/Latin I */
#if 0 /* these aren't in the table yet */
852, CP852, /* Slavic/Latin II */
857, CP857, /* Turkish/Latin V */
860, CP860, /* Portugal */
861, CP861, /* Iceland */
862, CP862, /* Hebrew */
863, CP863, /* Canadian - French */
865, CP865, /* Norway/Denmark */
866, CP866, /* Cyrillic */
869, CP869, /* Greek */
#endif
0, CP_NOT_SUPP /* end of list */
};
/* these names must match enum char_sets */
char *char_set_names[/*enum char_sets*/] = {
"X-IBM437", /* IBM code page 437 code page 437 is best */
"ISO-8859-1", /* Latin 1 code page 850 is best */
"ISO-8859-2", /* Latin 2 code page 852 is best */
"ISO-8859-3", /* Latin 3 code page 850 is best? */
"ISO-8859-4", /* Latin 4 code page 850 is best? */
"ISO-8859-9", /* Latin 5 code page 857 is best */
#if 0 /* these aren't in the mapping table yet */
"ISO-8859-5", /* Latin/Cyrillic code page 866 is best */
"ISO-8859-6", /* Latin/Arabic */
"ISO-8859-7", /* Latin/Greek code page 869 is best */
"ISO-8859-8", /* Latin/Hebrew */
"ISO-8859-10", /* Latin 6 code page 852 is best? */
#endif
"US-ASCII", /* (must be last in table) equivalent to ISO-8859-1 */
NULL
};
/* cptable[code_pages][char_sets][character code] */
char cptable[2 /*8*/][6/*12*/][0x80] = {
{ /* code page 437 mappings */
/* Map from code page 437 to code page 437 */
{
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
},
/* Map from ISO 8859-1 to code page 437 */
{
/* 128 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 136 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 144 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 152 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 160 */ 0xFF,0xAD,0x9B,0x9C,0x0F,0x9D,0x7C,0x15,
/* 168 */ '"', '@', 0xA6,0xAE,0xAA,0xC4,NOCH,'-',
/* 176 */ 0xF8,0xF1,0xFD,'3', 0x27,0xE6,0x14,0xFA,
/* 184 */ ',', '1', 0xA7,0xAF,0xAC,0xAB,NOCH,0xA8,
/* 192 */ 'A', 'A', 'A', 'A', 0x8E,0x8F,0x92,0x80,
/* 200 */ 'E', 0x90,'E', 'E', 'I', 'I', 'I', 'I',
/* 208 */ 'D', 0xA5,'O', 'O', 'O', 'O', 0x99,'X',
/* 216 */ 0xE9,'U', 'U', 'U', 0x9A,'Y', NOCH,0xE1,
/* 224 */ 0x85,0xA0,0x83,'a', 0x84,0x86,0x91,0x87,
/* 232 */ 0x8A,0x82,0x88,0x89,0x8D,0xA1,0x8C,0x8B,
/* 240 */ NOCH,0xA4,0x95,0xA2,0x93,'o', 0x94,0xF6,
/* 248 */ 0xED,0x97,0xA3,0x96,0x81,'y', NOCH,0x98
},
/* Map from ISO 8859-2 to code page 437 */
{
/* 128 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 136 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 144 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 152 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 160 */ 0xFF,'A', NOCH,'L', 0x0F,'L', 'S', 0x15,
/* 168 */ '"', 'S', 'S', 'T', 'Z', 0xC4,'Z', 'Z',
/* 176 */ 0xF8,'a', NOCH,'l', 0x27,'l', 's', NOCH,
/* 184 */ ',', 's', 's', 't', 'z', '"', 'z', 'z',
/* 192 */ 'R', 'A', 'A', 'A', 0x8E,'L', 'C', 0x80,
/* 200 */ 'C', 0x90,'E', 'E', 'E', 'I', 'I', 'D',
/* 208 */ 'D', 'N', 'N', 'O', 'O', 'O', 0x99,'X',
/* 216 */ 'R', 'U', 'U', 'U', 0x9A,'Y', 'T', 0xE1,
/* 224 */ 'r', 0xA0,0x83,'a', 0x84,'l', 'c', 0x87,
/* 232 */ 'c', 0x82,'e', 0x89,'e', 0xA1,0x8C,'d',
/* 240 */ 0xEB,'n', 'n', 0xA2,0x93,'o', 0x94,0xF6,
/* 248 */ 'r', 'u', 0xA3,'u', 0x81,'y', 't', 0xF9
},
/* Map from ISO 8859-3 to code page 437 */
{
/* 128 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 136 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 144 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 152 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 160 */ 0xFF,'H', NOCH,0x9C,0x0F,NOCH,'H', 0x15,
/* 168 */ '"', 'I', 'S', 'G', 'J', 0xC4,NOCH,'Z',
/* 176 */ 0xF8,'h', 0xFD,'3', 0x27,0xE6,'h', 0xFA,
/* 184 */ ',', 'i', 'd', 'g', 'j', 0xAB,NOCH,'z',
/* 192 */ 'A', 'A', 'A', NOCH,0x8E,'C', 'C', 0x80,
/* 200 */ 'E', 0x90,'E', 'E', 'I', 'I', 'I', 'I',
/* 208 */ NOCH,0xA5,'O', 'O', 'O', 'G', 0x99,'X',
/* 216 */ 'G', 'U', 'U', 'U', 0x9A,'U', 'S', 0xE1,
/* 224 */ 0x85,0xA0,0x83,NOCH,0x84,'c', 'c', 0x87,
/* 232 */ 0x8A,0x82,0x88,0x89,0x8D,0xA1,0x8C,0x8B,
/* 240 */ NOCH,0xA4,0x95,0xA2,0x93,'g', 0x94,0xF6,
/* 248 */ 'g', 0x97,0xA3,0x96,0x81,'u', 's', 0xF9
},
/* WHAT IS GREENLANDIC K (162) AND LAPPISH ENG (189,191)? */
/* Map from ISO 8859-4 to code page 437 */
{
/* 128 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 136 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 144 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 152 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 160 */ 0xFF,'A', 'k', 'R', 0x0F,'I', 'L', 0x15,
/* 168 */ '"', 'S', 'E', 'G', 'T', 0xC4,'Z', '-',
/* 176 */ 0xF8,'a', NOCH,'r', 0x27,'i', 'l', NOCH,
/* 184 */ ',', 's', 'e', 'g', 't', NOCH,'z', NOCH,
/* 192 */ 'A', 'A', 'A', 'A', 0x8E,0x8F,0x92,'I',
/* 200 */ 'C', 0x90,'E', 'E', 'E', 'I', 'I', 'I',
/* 208 */ 'D', 'N', 'O', 'K', 'O', 'O', 0x99,'X',
/* 216 */ 0xE9,'U', 'U', 'U', 0x9A,'U', 'U', 0xE1,
/* 224 */ 'a', 0xA0,0x83,'a', 0x84,0x86,0x91,'i',
/* 232 */ 'c', 0x82,'e', 0x89,'e', 0xA1,0x8C,'i',
/* 240 */ 0xEB,'n', 'o', 'k', 0x93,'o', 0x94,0xF6,
/* 248 */ 0xED,'u', 0xA3,0x96,0x81,'u', 'u', 0xF9
},
/* Map from ISO 8859-9 to code page 437 */
{
/* 128 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 136 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 144 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 152 */ NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,NOCH,
/* 160 */ 0xFF,0xAD,0x9B,0x9C,0x0F,0x9D,0x7C,0x15,
/* 168 */ '"', '@', 0xA6,0xAE,0xAA,0xC4,NOCH,'-',
/* 176 */ 0xF8,0xF1,0xFD,'3', 0x27,0xE6,0x14,0xFA,
/* 184 */ ',', '1', 0xA7,0xAF,0xAC,0xAB,NOCH,0xA8,
/* 192 */ 'A', 'A', 'A', 'A', 0x8E,0x8F,0x92,0x80,
/* 200 */ 'E', 0x90,'E', 'E', 'I', 'I', 'I', 'I',
/* 208 */ 'G', 0xA5,'O', 'O', 'O', 'O', 0x99,'X',
/* 216 */ 0xE9,'U', 'U', 'U', 0x9A,'I', 'S', 0xE1,
/* 224 */ 0x85,0xA0,0x83,'a', 0x84,0x86,0x91,0x87,
/* 232 */ 0x8A,0x82,0x88,0x89,0x8D,0xA1,0x8C,0x8B,
/* 240 */ 'g', 0xA4,0x95,0xA2,0x93,'o', 0x94,0xF6,
/* 248 */ 0xED,0x97,0xA3,0x96,0x81,'i', 's', 0x98
}
},
{ /* code page 850 mappings */
/* Map from code page 437 to code page 850 */
{
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
0x98,0x99,0x9A,0xBD,0x9C,0xBE,NOCH,0x9F,
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
0xA8,0xDA,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
0xB0,0xB1,0xB2,0xB3,0xB4,0